/*-------------------------------------------------------------------------
 *
 * execGrouping.c
 *      executor utility routines for grouping, hashing, and aggregation
 *
 * Note: we currently assume that equality and hashing functions are not
 * collation-sensitive, so the code in this file has no support for passing
 * collation settings through from callers.  That may have to change someday.
 *
 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * This source code file contains modifications made by THL A29 Limited ("Tencent Modifications").
 * All Tencent Modifications are Copyright (C) 2023 THL A29 Limited.
 *
 * IDENTIFICATION
 *      src/backend/executor/execGrouping.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "access/hash.h"
#include "access/parallel.h"
#include "executor/executor.h"
#include "miscadmin.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"

static uint32 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple);
static int    TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);

/*
 * Define parameters for tuple hash table code generation. The interface is
 * *also* declared in execnodes.h (to generate the types, which are externally
 * visible).
 */
#define SH_PREFIX tuplehash
#define SH_ELEMENT_TYPE TupleHashEntryData
#define SH_KEY_TYPE MinimalTuple
#define SH_KEY firstTuple
#define SH_HASH_KEY(tb, key) TupleHashTableHash(tb, key)
#define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
#define SH_SCOPE extern
#define SH_STORE_HASH
#define SH_GET_HASH(tb, a) a->hash
#define SH_DEFINE
#include "lib/simplehash.h"


/*****************************************************************************
 *        Utility routines for grouping tuples together
 *****************************************************************************/

/*
 * execTuplesMatch
 *        Return true if two tuples match in all the indicated fields.
 *
 * This actually implements SQL's notion of "not distinct".  Two nulls
 * match, a null and a not-null don't match.
 *
 * slot1, slot2: the tuples to compare (must have same columns!)
 * numCols: the number of attributes to be examined
 * matchColIdx: array of attribute column numbers
 * eqFunctions: array of fmgr lookup info for the equality functions to use
 * evalContext: short-term memory context for executing the functions
 *
 * NB: evalContext is reset each time!
 */
bool
execTuplesMatch(TupleTableSlot *slot1,
                TupleTableSlot *slot2,
                int numCols,
                AttrNumber *matchColIdx,
                FmgrInfo *eqfunctions,
                MemoryContext evalContext)
{
    MemoryContext oldContext;
    bool        result;
    int            i;

    /* Reset and switch into the temp context. */
    MemoryContextReset(evalContext);
    oldContext = MemoryContextSwitchTo(evalContext);

    /*
     * We cannot report a match without checking all the fields, but we can
     * report a non-match as soon as we find unequal fields.  So, start
     * comparing at the last field (least significant sort key). That's the
     * most likely to be different if we are dealing with sorted input.
     */
    result = true;

    for (i = numCols; --i >= 0;)
    {
        AttrNumber    att = matchColIdx[i];
        Datum        attr1,
                    attr2;
        bool        isNull1,
                    isNull2;

        attr1 = slot_getattr(slot1, att, &isNull1);

        attr2 = slot_getattr(slot2, att, &isNull2);

        if (isNull1 != isNull2)
        {
            result = false;        /* one null and one not; they aren't equal */
            break;
        }

        if (isNull1)
            continue;            /* both are null, treat as equal */

        /* Apply the type-specific equality function */

        if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
                                        attr1, attr2)))
        {
            result = false;        /* they aren't equal */
            break;
        }
    }

    MemoryContextSwitchTo(oldContext);

    return result;
}

/*
 * execTuplesUnequal
 *        Return true if two tuples are definitely unequal in the indicated
 *        fields.
 *
 * Nulls are neither equal nor unequal to anything else.  A true result
 * is obtained only if there are non-null fields that compare not-equal.
 *
 * Parameters are identical to execTuplesMatch.
 */
bool
execTuplesUnequal(TupleTableSlot *slot1,
                  TupleTableSlot *slot2,
                  int numCols,
                  AttrNumber *matchColIdx,
                  FmgrInfo *eqfunctions,
                  MemoryContext evalContext)
{
    MemoryContext oldContext;
    bool        result;
    int            i;

    /* Reset and switch into the temp context. */
    MemoryContextReset(evalContext);
    oldContext = MemoryContextSwitchTo(evalContext);

    /*
     * We cannot report a match without checking all the fields, but we can
     * report a non-match as soon as we find unequal fields.  So, start
     * comparing at the last field (least significant sort key). That's the
     * most likely to be different if we are dealing with sorted input.
     */
    result = false;

    for (i = numCols; --i >= 0;)
    {
        AttrNumber    att = matchColIdx[i];
        Datum        attr1,
                    attr2;
        bool        isNull1,
                    isNull2;

        attr1 = slot_getattr(slot1, att, &isNull1);

        if (isNull1)
            continue;            /* can't prove anything here */

        attr2 = slot_getattr(slot2, att, &isNull2);

        if (isNull2)
            continue;            /* can't prove anything here */

        /* Apply the type-specific equality function */

        if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
                                        attr1, attr2)))
        {
            result = true;        /* they are unequal */
            break;
        }
    }

    MemoryContextSwitchTo(oldContext);

    return result;
}


/*
 * execTuplesMatchPrepare
 *        Look up the equality functions needed for execTuplesMatch or
 *        execTuplesUnequal, given an array of equality operator OIDs.
 *
 * The result is a palloc'd array.
 */
FmgrInfo *
execTuplesMatchPrepare(int numCols,
                       Oid *eqOperators)
{
    FmgrInfo   *eqFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
    int            i;

    for (i = 0; i < numCols; i++)
    {
        Oid            eq_opr = eqOperators[i];
        Oid            eq_function;

        eq_function = get_opcode(eq_opr);
        fmgr_info(eq_function, &eqFunctions[i]);
    }

    return eqFunctions;
}

/*
 * execTuplesHashPrepare
 *        Look up the equality and hashing functions needed for a TupleHashTable.
 *
 * This is similar to execTuplesMatchPrepare, but we also need to find the
 * hash functions associated with the equality operators.  *eqFunctions and
 * *hashFunctions receive the palloc'd result arrays.
 *
 * Note: we expect that the given operators are not cross-type comparisons.
 */
void
execTuplesHashPrepare(int numCols,
                      Oid *eqOperators,
                      FmgrInfo **eqFunctions,
                      FmgrInfo **hashFunctions)
{
    int            i;

    *eqFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
    *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));

    for (i = 0; i < numCols; i++)
    {
        Oid            eq_opr = eqOperators[i];
        Oid            eq_function;
        Oid            left_hash_function;
        Oid            right_hash_function;

        eq_function = get_opcode(eq_opr);
        if (!get_op_hash_functions(eq_opr,
                                   &left_hash_function, &right_hash_function))
            elog(ERROR, "could not find hash function for hash operator %u",
                 eq_opr);
        /* We're not supporting cross-type cases here */
        Assert(left_hash_function == right_hash_function);
        fmgr_info(eq_function, &(*eqFunctions)[i]);
        fmgr_info(right_hash_function, &(*hashFunctions)[i]);
    }
}


/*****************************************************************************
 *        Utility routines for all-in-memory hash tables
 *
 * These routines build hash tables for grouping tuples together (eg, for
 * hash aggregation).  There is one entry for each not-distinct set of tuples
 * presented.
 *****************************************************************************/

/*
 * Construct an empty TupleHashTable
 *
 *    numCols, keyColIdx: identify the tuple fields to use as lookup key
 *    eqfunctions: equality comparison functions to use
 *    hashfunctions: datatype-specific hashing functions to use
 *    nbuckets: initial estimate of hashtable size
 *    additionalsize: size of data stored in ->additional
 *    tablecxt: memory context in which to store table and table entries
 *    tempcxt: short-lived context for evaluation hash and comparison functions
 *
 * The function arrays may be made with execTuplesHashPrepare().  Note they
 * are not cross-type functions, but expect to see the table datatype(s)
 * on both sides.
 *
 * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
 * storage that will live as long as the hashtable does.
 */
TupleHashTable
BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
                    FmgrInfo *eqfunctions,
                    FmgrInfo *hashfunctions,
                    long nbuckets, Size additionalsize,
                    MemoryContext tablecxt, MemoryContext tempcxt,
                    bool use_variable_hash_iv)
{
    TupleHashTable hashtable;
    Size        entrysize = sizeof(TupleHashEntryData) + additionalsize;

    Assert(nbuckets > 0);

    /* Limit initial table size request to not more than work_mem */
    nbuckets = Min(nbuckets, (long) ((work_mem * 1024L) / entrysize));

    hashtable = (TupleHashTable)
        MemoryContextAlloc(tablecxt, sizeof(TupleHashTableData));

    hashtable->numCols = numCols;
    hashtable->keyColIdx = keyColIdx;
    hashtable->tab_hash_funcs = hashfunctions;
    hashtable->tab_eq_funcs = eqfunctions;
    hashtable->tablecxt = tablecxt;
    hashtable->tempcxt = tempcxt;
    hashtable->entrysize = entrysize;
    hashtable->tableslot = NULL;    /* will be made on first lookup */
    hashtable->inputslot = NULL;
    hashtable->in_hash_funcs = NULL;
    hashtable->cur_eq_funcs = NULL;
#ifdef __OPENTENBASE__
	hashtable->spilled = false;
	hashtable->spill_set = NULL;
	hashtable->hybrid = false;
#endif

    /*
     * If parallelism is in use, even if the master backend is performing the
     * scan itself, we don't want to create the hashtable exactly the same way
     * in all workers. As hashtables are iterated over in keyspace-order,
     * doing so in all processes in the same way is likely to lead to
     * "unbalanced" hashtables when the table size initially is
     * underestimated.
     */
    if (use_variable_hash_iv)
        hashtable->hash_iv = hash_uint32(ParallelWorkerNumber);
    else
        hashtable->hash_iv = 0;

    hashtable->hashtab = tuplehash_create(tablecxt, nbuckets, hashtable);

    return hashtable;
}

/*
 * Find or create a hashtable entry for the tuple group containing the
 * given tuple.  The tuple must be the same type as the hashtable entries.
 *
 * If isnew is NULL, we do not create new entries; we return NULL if no
 * match is found.
 *
 * If isnew isn't NULL, then a new entry is created if no existing entry
 * matches.  On return, *isnew is true if the entry is newly created,
 * false if it existed already.  ->additional_data in the new entry has
 * been zeroed.
 */
TupleHashEntry
LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
                     bool *isnew)
{
    TupleHashEntryData *entry;
    MemoryContext oldContext;
    bool        found;
    MinimalTuple key;

    /* If first time through, clone the input slot to make table slot */
    if (hashtable->tableslot == NULL)
    {
        TupleDesc    tupdesc;

        oldContext = MemoryContextSwitchTo(hashtable->tablecxt);

        /*
         * We copy the input tuple descriptor just for safety --- we assume
         * all input tuples will have equivalent descriptors.
         */
        tupdesc = CreateTupleDescCopy(slot->tts_tupleDescriptor);
        hashtable->tableslot = MakeSingleTupleTableSlot(tupdesc);
        MemoryContextSwitchTo(oldContext);
    }

    /* Need to run the hash functions in short-lived context */
    oldContext = MemoryContextSwitchTo(hashtable->tempcxt);

    /* set up data needed by hash and match functions */
    hashtable->inputslot = slot;
    hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
    hashtable->cur_eq_funcs = hashtable->tab_eq_funcs;

    key = NULL;                    /* flag to reference inputslot */

    if (isnew)
    {
        entry = tuplehash_insert(hashtable->hashtab, key, &found);

        if (found)
        {
            /* found pre-existing entry */
            *isnew = false;
        }
        else
        {
#ifdef __OPENTENBASE__
			if (hashtable->hybrid)
			{
				if (!entry)
				{
					/* hashtable is full, need to dump it */
					MemoryContextSwitchTo(oldContext);

					return NULL;
				}
			}
			else
			{
				if (!entry)
				{
					elog(ERROR, "could not find entry in hashtable");
				}
			}
#endif
            /* created new entry */
            *isnew = true;
            /* zero caller data */
            entry->additional = NULL;
#ifdef __OPENTENBASE__
			if (hashtable->hybrid)
			{
				/* use our own memorycontext */
				MemoryContextSwitchTo(hashtable->hybridcxt);
			}
			else
			{
				MemoryContextSwitchTo(hashtable->tablecxt);
			}
#else
            MemoryContextSwitchTo(hashtable->tablecxt);
#endif
            /* Copy the first tuple into the table context */
            entry->firstTuple = ExecCopySlotMinimalTuple(slot);
        }
    }
    else
    {
        entry = tuplehash_lookup(hashtable->hashtab, key);
    }

    MemoryContextSwitchTo(oldContext);

    return entry;
}

/*
 * Search for a hashtable entry matching the given tuple.  No entry is
 * created if there's not a match.  This is similar to the non-creating
 * case of LookupTupleHashEntry, except that it supports cross-type
 * comparisons, in which the given tuple is not of the same type as the
 * table entries.  The caller must provide the hash functions to use for
 * the input tuple, as well as the equality functions, since these may be
 * different from the table's internal functions.
 */
TupleHashEntry
FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
                   FmgrInfo *eqfunctions,
                   FmgrInfo *hashfunctions)
{
    TupleHashEntry entry;
    MemoryContext oldContext;
    MinimalTuple key;

    /* Need to run the hash functions in short-lived context */
    oldContext = MemoryContextSwitchTo(hashtable->tempcxt);

    /* Set up data needed by hash and match functions */
    hashtable->inputslot = slot;
    hashtable->in_hash_funcs = hashfunctions;
    hashtable->cur_eq_funcs = eqfunctions;

    /* Search the hash table */
    key = NULL;                    /* flag to reference inputslot */
    entry = tuplehash_lookup(hashtable->hashtab, key);
    MemoryContextSwitchTo(oldContext);

    return entry;
}

/*
 * Compute the hash value for a tuple
 *
 * The passed-in key is a pointer to TupleHashEntryData.  In an actual hash
 * table entry, the firstTuple field points to a tuple (in MinimalTuple
 * format).  LookupTupleHashEntry sets up a dummy TupleHashEntryData with a
 * NULL firstTuple field --- that cues us to look at the inputslot instead.
 * This convention avoids the need to materialize virtual input tuples unless
 * they actually need to get copied into the table.
 *
 * Also, the caller must select an appropriate memory context for running
 * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
 */
static uint32
TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple)
{
    TupleHashTable hashtable = (TupleHashTable) tb->private_data;
    int            numCols = hashtable->numCols;
    AttrNumber *keyColIdx = hashtable->keyColIdx;
    uint32        hashkey = hashtable->hash_iv;
    TupleTableSlot *slot;
    FmgrInfo   *hashfunctions;
    int            i;

    if (tuple == NULL)
    {
        /* Process the current input tuple for the table */
        slot = hashtable->inputslot;
        hashfunctions = hashtable->in_hash_funcs;
    }
    else
    {
        /*
         * Process a tuple already stored in the table.
         *
         * (this case never actually occurs due to the way simplehash.h is
         * used, as the hash-value is stored in the entries)
         */
        slot = hashtable->tableslot;
        ExecStoreMinimalTuple(tuple, slot, false);
        hashfunctions = hashtable->tab_hash_funcs;
    }

    for (i = 0; i < numCols; i++)
    {
        AttrNumber    att = keyColIdx[i];
        Datum        attr;
        bool        isNull;

        /* rotate hashkey left 1 bit at each step */
        hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);

        attr = slot_getattr(slot, att, &isNull);

        if (!isNull)            /* treat nulls as having hash key 0 */
        {
            uint32        hkey;

            hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i],
                                                attr));
            hashkey ^= hkey;
        }
    }

    return hashkey;
}

/*
 * See whether two tuples (presumably of the same hash value) match
 *
 * As above, the passed pointers are pointers to TupleHashEntryData.
 *
 * Also, the caller must select an appropriate memory context for running
 * the compare functions.  (dynahash.c doesn't change CurrentMemoryContext.)
 */
static int
TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
{
    TupleTableSlot *slot1;
    TupleTableSlot *slot2;
    TupleHashTable hashtable = (TupleHashTable) tb->private_data;

    /*
     * We assume that simplehash.h will only ever call us with the first
     * argument being an actual table entry, and the second argument being
     * LookupTupleHashEntry's dummy TupleHashEntryData.  The other direction
     * could be supported too, but is not currently required.
     */
    Assert(tuple1 != NULL);
    slot1 = hashtable->tableslot;
    ExecStoreMinimalTuple(tuple1, slot1, false);
    Assert(tuple2 == NULL);
    slot2 = hashtable->inputslot;

    /* For crosstype comparisons, the inputslot must be first */
    if (execTuplesMatch(slot2,
                        slot1,
                        hashtable->numCols,
                        hashtable->keyColIdx,
                        hashtable->cur_eq_funcs,
                        hashtable->tempcxt))
        return 0;
    else
        return 1;
}
